knitr::opts_knit$set(root.dir = here::here())
library(tidyverse)
library(scater)
library(scran)
library(scuttle)
library(clusterProfiler)
library(org.Mm.eg.db)
library(tidytext)
library(slingshot)
theme_set(theme_classic())Meeting 2/19/24
read_rds("~/work/JainLabSingleCell23_downstream_v2/results/single-cell-preproc/preprocessed/adult_9646_combined.cellranger.g_mito_cuts.rds") + labs(title="9646")read_rds("~/work/JainLabSingleCell23_downstream_v2/results/single-cell-preproc/preprocessed/adult_9682_combined.cellranger.g_mito_cuts.rds") + labs(title="9682")Post-integration overview
sce <- read_rds("~/work/JainLabSingleCell23_downstream_v2/results/single-cell-preproc/integrated/adult.cellranger.sce.integrated.rds")plotUMAP(sce,colour_by="celltype",text_by="celltype")plotUMAP(sce,colour_by="label2",text_by="label2")makePerCellDF(sce) |>
ggplot(aes(batch,fill=genotype)) +
geom_bar(position="dodge") +
scale_fill_grey()makePerCellDF(sce) |>
ggplot(aes(celltype,fill=genotype)) +
geom_bar(position="dodge") +
scale_fill_grey() +
facet_wrap(~batch,ncol=1) +
theme(axis.text.x = element_text(angle=45, hjust=1))makePerCellDF(sce) |>
ggplot(aes(celltype,nexprs,fill=genotype)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust=1))Germ cell trajectory
germ_sce <- read_rds("~/work/JainLabSingleCell23_downstream_v2/results/trajectory/adult.cellranger.sce.germ_cell.trajectory.rds")
germ_sce_exportable <- germ_sce
germ_sce_exportable$slingshot <- NULL
embedded <- embedCurves(germ_sce, "UMAP")
embedded <- slingCurves(embedded)[[1]] # only 1 path.
embedded <- data.frame(embedded$s[embedded$ord,])
get_ps_umap <- \(x,lab,text=NULL) plotUMAP(x, colour_by=lab, text_by=text) +
geom_path(data=embedded[seq(nrow(embedded)-300),], aes(x=Dim.1, y=Dim.2), size=1.2, arrow = arrow(length=unit(0.30,"cm"), ends="last", type = "closed")) +
labs(title=unique(x$genotype))get_ps_umap(germ_sce[,germ_sce$genotype==c("WT")],"celltype") +
get_ps_umap(germ_sce[,germ_sce$genotype==c("MUT")],"celltype")Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
get_ps_umap(germ_sce[,germ_sce$genotype==c("WT")],"label2","label2") +
get_ps_umap(germ_sce[,germ_sce$genotype==c("MUT")],"label2","label2")TRADE-seq
trade_res <- read_rds("results/tradeseq/adult.cellranger.sce.germ_cell.tradeseq.rds")
trade_res$slingshot <- NULL
library(tradeSeq)
gtade <- tradeSeq::plotSmoothers(trade_res,
counts=logcounts(trade_res),
gene="L1MdA_I_5end",
lwd=1,
nPoints=10)# --------------- dysreg in adjult vs juvenile
adult_dysreg <- names(Biostrings::readDNAStringSet("results/tradeseq/adult.cellranger.germ_cell.dysregulated_tes.fasta"))
juv_dysreg <- names(Biostrings::readDNAStringSet("results/tradeseq/juvenile_13d_wt_null.cellranger.germ_cell.dysregulated_tes.fasta"))
dysreg <- list(adult=adult_dysreg, juvenile=juv_dysreg) |>
enframe() |>
unnest(value) |>
mutate(dysregulated=T) |>
pivot_wider(names_from = name, values_from = dysregulated,values_fill = F)
filter(dysreg,str_detect(value,"^L1")) |>
gt::gt()| value | adult | juvenile |
|---|---|---|
| L1MdV_III_3end | TRUE | FALSE |
| L1MdF_III_3end | TRUE | FALSE |
| L1MdF_IV_3end | TRUE | FALSE |
| L1MdMus_I_5end | TRUE | FALSE |
| L1MdTf_II_3end | TRUE | FALSE |
| L1MdA_IV_3end | TRUE | FALSE |
| L1MdA_I_5end | FALSE | TRUE |
| L1MdA_II_5end | FALSE | TRUE |
| L1MdA_I_3end | FALSE | TRUE |
| L1MdTf_III_5end | FALSE | TRUE |
| L1MdF_I_3end | FALSE | TRUE |
| L1MdA_I_orf2 | FALSE | TRUE |
| L1Lx_I_3end | FALSE | TRUE |
| L1MdF_V_3end | FALSE | TRUE |
| L1MdA_IV_orf2 | FALSE | TRUE |
| L1MdTf_III_orf2 | FALSE | TRUE |
| L1MdMus_II_orf2 | FALSE | TRUE |
filter(dysreg,!str_detect(value,"^L1")) |>
gt::gt()| value | adult | juvenile |
|---|---|---|
| MMERVK10C | TRUE | TRUE |
| RLTR10C | TRUE | TRUE |
| B2_Mm2 | TRUE | TRUE |
| B1_Mus1 | TRUE | FALSE |
| B1_Mus2 | TRUE | FALSE |
| RLTR6-int | TRUE | FALSE |
| B1_Mm | TRUE | FALSE |
| IAPEz-int | TRUE | FALSE |
| B2_Mm1a | TRUE | FALSE |
| MMVL30-int | TRUE | TRUE |
| RLTR44-int | TRUE | FALSE |
| IAPLTR2_Mm | FALSE | TRUE |
| IAPLTR2a | FALSE | TRUE |
| B2_Mm1t | FALSE | TRUE |
intersect(adult_dysreg, juv_dysreg)[1] "MMERVK10C" "RLTR10C" "B2_Mm2" "MMVL30-int"
Differential expression
plot_ma <- function(obj,lab=NULL) {
mutate(obj,type = if_else(str_detect(feature,"ENSMUSG"),"gene","TE")) |>
arrange(desc(type)) |>
ggplot(aes(logCPM,logFC,color=type)) +
geom_point() +
labs(title=lab) +
scale_color_manual(values=c("TE"="red","gene"="gray"))
}
plot_de <- function(obj,lab=NULL) {
mutate(obj,type = if_else(str_detect(feature,"ENSMUSG"),"gene","TE")) |>
arrange(desc(type)) |>
ggplot(aes(logFC,-log10(PValue),color=type)) +
geom_point() +
labs(title=lab) +
scale_color_manual(values=c("TE"="red","gene"="gray"))
}RS 17 vs RS 1
summed.sub <- sce[,sce$label2 %in% c("17/RoundSpermatid", "1/RoundSpermatid") & sce$genotype == "MUT"]
summed.sub$label2 <- summed.sub$label2 |> factor() |> relevel(ref="1/RoundSpermatid")
between.res <- pseudoBulkDGE(summed.sub,
label=rep("dummy", ncol(summed.sub)),
design=~batch + label2,
condition=summed.sub$label2,
coef="label217/RoundSpermatid")[[1]]
between.res |>
as_tibble(rownames="feature") |>
drop_na() |>
plot_ma(lab="RS17 vs RS1")between genotype for each celltype
de <- read_tsv("results/differential_expression/adult.tbl.pseudobulk.de.tsv.gz")Rows: 49552 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): celltype, feature
dbl (5): logFC, logCPM, F, PValue, FDR
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MA-plots
plot_ma(filter(de,celltype=="Spermatogonia"),lab = "Spermatogonia")plot_ma(filter(de,celltype=="Spermatocyte"),lab = "Spermatocyte")plot_ma(filter(de,celltype=="RoundSpermatid"),lab = "RoundSpermatid")Volcanos
plot_de(filter(de,celltype=="Spermatogonia"),lab = "Spermatogonia")plot_de(filter(de,celltype=="Spermatocyte"),lab = "Spermatocyte")plot_de(filter(de,celltype=="RoundSpermatid"),lab = "RoundSpermatid")de |>
filter(celltype == "RoundSpermatid" & FDR < 0.05) |>
arrange(-logFC) |>
dplyr::select(feature,logFC,logCPM) |>
head() |>
gt::gt()| feature | logFC | logCPM |
|---|---|---|
| ENSMUSG00000028177 | 5.323075 | 0.20278125 |
| ENSMUSG00000085126 | 4.949314 | -0.04861464 |
| ENSMUSG00000090487 | 4.460070 | 1.64918976 |
| ENSMUSG00000072884 | 4.433038 | -0.38806467 |
| ENSMUSG00000091418 | 4.396233 | 0.23513199 |
| ENSMUSG00000091718 | 4.329572 | 1.57183930 |
# confirming direction of LFCs make sense
makePerCellDF(germ_sce_exportable, features = "ENSMUSG00000090487") |>
ggplot(aes(celltype,ENSMUSG00000090487,fill=genotype)) +
geom_violin(scale = "width") +
coord_cartesian(ylim=c(0,1))GO term enrichment
go_enrichment <-filter(de,
celltype%in%c("Spermatogonia",
"Spermatocyte",
"RoundSpermatid")) %>%
filter(str_detect(feature,"ENSMUSG")) %>%
split(.,.$celltype) |>
map(~dplyr::select(.x,feature,logFC)) |>
map(deframe) |>
map(sort,decreasing=T) |>
map(~gseGO(.x,ont = "ALL",OrgDb=org.Mm.eg.db,keyType="ENSEMBL",
maxGSSize =500,
minGSSize = 10,
seed = 2022,
nPermSimple = 10000,
eps=0))Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.19% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
Warning in fgseaMultilevel(...): There were 28 pathways for which P-values were
not calculated properly due to unbalanced (positive and negative) gene-level
statistic values. For such pathways pval, padj, NES, log2err are set to NA. You
can try to increase the value of the argument nPermSimple (for example set it
nPermSimple = 100000)
Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.17% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.35% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
go_df <- go_enrichment |>
map_df(as_tibble,.id="celltype")go_df |>
filter(p.adjust < 0.05) |>
group_by(celltype,sign(NES)) |>
slice_max(-log10(pvalue),n=20) |>
group_by(celltype) |>
filter(n()>3) |>
ungroup() |>
mutate(Description=reorder_within(Description,sign(NES)*-log10(pvalue),within=celltype)) |>
ggplot(aes(-log10(pvalue)*sign(NES),Description)) +
geom_col() +
facet_wrap(~celltype,scales="free",ncol=1) +
scale_y_reordered() +
xlab("signed log10(p)")library("AnnotationDbi")
go_df |>
filter(Description=="methylated histone binding") |>
filter(celltype=="Spermatocyte") |>
pull(core_enrichment) |>
head(1) |>
str_split("/") |>
pluck(1) |>
mapIds(org.Mm.eg.db,keys=_,column="SYMBOL", keytype="ENSEMBL", multiVals="first") |>
walk(message)'select()' returned 1:many mapping between keys and columns
Zmynd11
Cbx3
Rag2
Gm20918
Gm20737
Gm20825
Gm20807
Gm21394
Gm20873
Gm20795
Gm21118
Gm20806
Gm20773
Gm21310
Gm20854
Gm20816
Gm21943
Gm20815
Gm20738
Gm20865
Gm20830
Ssty2
Gm20914
de |> filter(feature == "ENSMUSG00000034610") # tut4# A tibble: 4 × 7
celltype feature logFC logCPM F PValue FDR
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Myoid ENSMUSG00000034610 -0.163 7.24 0.178 0.674 0.963
2 RoundSpermatid ENSMUSG00000034610 0.849 4.78 17.3 0.0000322 0.000162
3 Spermatocyte ENSMUSG00000034610 0.0107 5.79 0.00540 0.941 1.00
4 Spermatogonia ENSMUSG00000034610 0.0626 8.01 0.127 0.723 1.00
de |> filter(feature == "ENSMUSG00000035248") # tut7# A tibble: 4 × 7
celltype feature logFC logCPM F PValue FDR
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Myoid ENSMUSG00000035248 -0.259 6.90 0.393 0.531 0.924
2 RoundSpermatid ENSMUSG00000035248 0.628 4.13 7.16 0.00749 0.0210
3 Spermatocyte ENSMUSG00000035248 -0.0556 5.44 0.126 0.722 1.00
4 Spermatogonia ENSMUSG00000035248 0.0125 4.82 0.00130 0.971 1.00
DE TEs
de |>
filter(FDR < 0.05) |>
mutate(type= if_else(str_detect(feature,"ENSMUSG"),"gene","TE")) |>
dplyr::count(celltype,type,direction=if_else(logFC > 0,"increased","decreased")) |>
pivot_wider(names_from = "direction",values_from = "n",values_fill = 0) |>
gt::gt(rowname_col = "celltype")| type | increased | decreased | |
|---|---|---|---|
| Elongating | gene | 1 | 0 |
| Myoid | TE | 0 | 1 |
| Myoid | gene | 17 | 37 |
| RoundSpermatid | TE | 52 | 20 |
| RoundSpermatid | gene | 2339 | 3304 |
| Spermatocyte | TE | 3 | 2 |
| Spermatocyte | gene | 164 | 62 |
| Spermatogonia | TE | 1 | 0 |
| Spermatogonia | gene | 1 | 54 |
rs_de_tes <- de |>
filter(celltype=="RoundSpermatid") |>
filter(!str_detect(feature,"ENSMUSG")) |>
mutate(direction=case_when(FDR>=0.05~"ns",
logFC >0~"increased",
logFC <0~"decreased",
T~NA)) |>
dplyr::select(celltype,feature,direction,logFC) rs_de_tes |>
filter(direction!="ns") |>
mutate(feature=fct_reorder(feature,logFC)) |>
ggplot(aes(logFC,feature)) +
geom_col() +
facet_grid(~direction,scales = "free",space = "free")DFAM hits
Note this is from mm10, but ref we’re using is GRCm39.
dfam_hits <- read_tsv("~/Downloads/mm10.nrph.hits.gz")Rows: 4542408 Columns: 15
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (4): #seq_name, family_acc, family_name, strand
dbl (11): bits, e-value, bias, hmm-st, hmm-en, ali-st, ali-en, env-st, env-e...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dfh2 <- dfam_hits |>
dplyr::select(chr=`#seq_name`,start=`ali-st`,end=`ali-en`,feature=family_name,kimura_div,seqlen=`sq-len`) |>
right_join(rs_de_tes)Joining with `by = join_by(feature)`
dfh2 |>
ggplot(aes(direction,kimura_div)) +
geom_boxplot()dfh2 |>
filter(str_detect(chr,"chrY|chrX")) |>
mutate(chr2 = if_else(str_detect(chr,"chrY"),"chrY",'chrX')) |>
ggplot(aes(direction,kimura_div)) +
geom_boxplot() +
facet_wrap(~chr2)dfh2 |>
#filter(direction == "increased") |>
filter(!str_detect(chr,"_")) |>
ggplot(aes(start,color=direction)) +
geom_density() +
facet_wrap(~chr,scales="free_x",ncol=1) +
theme(axis.text.x = element_text(angle=45,hjust=1))dfh2 |>
filter(direction!="ns") |>
group_by(feature,chr,direction,seqlen) |>
tally() |>
mutate(hits_per_mb = n/(seqlen/1e6)) |>
ggplot(aes(feature,chr,fill=hits_per_mb)) +
geom_tile() +
facet_wrap(~direction,ncol = 2,scales="free_x") +
theme(axis.text.x = element_text(angle=90,hjust=1,vjust=0.5))